/*****Section 1: Prepare data for estimation*****/

log using log_recode, replace

/*Dataset has 8,079 variables and 97,006 observations*/
set maxvar 10000
use "original_dataset/BES2019_W19_Panel_v0.5.dta"


//drop 477 observations that had an ID in the dataset but were not in any wave nor had any data collected. (entire row was missing for each)
//now have 96,529 observations
egen num_waves = rowtotal(wave*)
drop if num_waves == 0


//create recoded variable for dutyToVote2W*

forvalues v = 1/19 {
capture confirm variable dutyToVote2W`v'

if !_rc {
		gen rec_dutyToVote2W`v' = dutyToVote2W`v'
		
		//don't knows
		replace rec_dutyToVote2W`v' = 0 if dutyToVote2W`v' == 9999
		
		//people who were in wave but did not answer civic duty question
			
		replace rec_dutyToVote2W`v' = -1 if dutyToVote2W`v' == . & wave`v' == 1 
		
		//people who were not in wave 
		
		replace rec_dutyToVote2W`v' = -2 if wave`v' == 0 
		
		label value rec_dutyToVote2W`v' voteNormsGridlbl
		label variable rec_dutyToVote2W`v' "Recoded Civic Duty to include respondents in wave but not asked civic duty question"
		
		order rec_dutyToVote2W`v', after(dutyToVote2W`v')
		
		}
    else {
        display "`dutyToVote2W`v' does not exist"
		}	
}






gen civic_duty_numwaves = wave1 + wave2 + wave3 + wave4 + wave6 + wave7 + wave8+ wave12 + wave14+ wave15 + wave17 + wave18
label variable civic_duty_numwaves "Number of waves respondent was in that asked civic duty questions"






/////list of waves where respondents answered the civic duty questions
gen civic_duty_waves = ""
label variable civic_duty_waves "List of waves where answered the civic duty questions"
gen civic_duty_numwaves_answered = 0
label variable civic_duty_numwaves_answered "Number of waves where answered the civic duty questions"

forvalues v = 1/18 {
capture confirm variable dutyToVote2W`v'

if !_rc {
		
		replace civic_duty_waves  = civic_duty_waves  + "-" + "`v'" if wave`v' == 1 & dutyToVote2W`v' != . 
		
		
		replace civic_duty_numwaves_answered = civic_duty_numwaves_answered + 1 if wave`v' == 1 & dutyToVote2W`v' != . 
		}
    else {
        display "`dutyToVote2W`v' does not exist"
		}	
}

/**1635 different combinations for waves that respondents were interviewed**/
tab civic_duty_waves, sort
tab civic_duty_numwaves_answered, sort
tab civic_duty_waves if civic_duty_numwaves_answered > 1, sort




/* label define voteNormsGridlbl -2 "Not in Wave" -1 "Missing due to routing/skipping" 0 "Don't Know" 1 " Strongly disagree" 2 "Disagree" 3 "Neither agree nor disagree" 4 "Agree" 5 "Strongly agree" */

label define voteNormsGridlbl -2 "Not'Wave" -1 "Missing" 0 "D'Know" 1 " S'Disagree" 2 "Disagree" 3 "Neither" 4 "Agree" 5 "S'Agree"




//create recoded variable for socialPressureVoteW*

forvalues v = 1/19 {
capture confirm variable socialPressureVoteW`v'

if !_rc {
		gen rec_socialPressureVoteW`v' = socialPressureVoteW`v'
		
		//don't knows
		replace rec_socialPressureVoteW`v' = 0 if socialPressureVoteW`v' == 9999
		
		//people who were in wave but did not answer civic duty question
			
		replace rec_socialPressureVoteW`v' = -1 if socialPressureVoteW`v' == . & wave`v' == 1 
		
		//people who were not in wave 
		
		replace rec_socialPressureVoteW`v' = -2 if wave`v' == 0 
		
		label value rec_socialPressureVoteW`v' voteNormsGridlbl
		label variable rec_socialPressureVoteW`v' "Recoded Social Pressure to include respondents in wave but not asked social pressure question"
		
		
		}
    else {
        display "`socialPressureVoteW`v' does not exist"
		}	
}




  
 /*create variable with start and end dates for interview as number of days since Jan. 1, 1960.  This is used to account for the difference in time between interviews across individuals and waves. */

foreach ww of numlist 1/19 {
  gen startDayFrm_Jan1_1960W`ww' = dofc(starttimeW`ww')
  format startDayFrm_Jan1_1960W`ww' %15.0f  
  label variable startDayFrm_Jan1_1960W`ww' "Days between Jan. 1, 1960 and Start Date of Interview"  
  order startDayFrm_Jan1_1960W`ww', after(starttimeW`ww')
 
  gen endDayFrm_Jan1_1960W`ww' = dofc(endtimeW`ww')
  format endDayFrm_Jan1_1960W`ww' %15.0f
  label variable endDayFrm_Jan1_1960W`ww' "Days between Jan. 1, 1960 and End Date of Interview"
  order endDayFrm_Jan1_1960W`ww', after(endtimeW`ww')
}





/*country: 1 = England; 2 = Scotland; 3 = Wales*/

label define countrylbl 1 "England" 2 "Scotland" 3 "Wales" 

foreach vv of numlist 1/19 {
label value countryW`vv' countrylbl
 
}



/***Kept only a subset of variables**/

//reduced number of variables from 8000+ to 711; 96,529 observations

keep id wave* starttimeW* startDayFrm_Jan1_1960W* endtimeW* endDayFrm_Jan1_1960W*   /*dependent variable*/ dutyToVote2W* /*recoded civic duty variable*/ rec_dutyToVote2W* /**calculated values about number of waves*/ civic_duty_numwaves civic_duty_numwaves_answered civic_duty_waves /**survey weights*/ wt_full_W1 wt_full_W2 wt_full_W3  wt_full_W4  wt_full_W5 wt_new_W6 wt_new_W7 wt_new_W8 wt_new_W9 wt_new_W10 wt_new_W11 wt_new_W12 wt_new_W13_result wt_new_W14 wt_new_W15 wt_new_W16 wt_new_W17 wt_new_W18 wt_new_W19_result /*most people that I know vote??*/ socialPressureVoteW* /*recoded social pressure to vote variable*/rec_socialPressureVoteW* /*measures of efficacy*/ efficacyUnderstandW* efficacyTooMuchEffortW* efficacyNotUnderstandW* efficacyPolCareW* efficacyVoteEffortW* efficacyEnjoyVoteW* efficacyNoMatterW* efficacyEUW* voteMakesDifferenceW* /*turnout*/ turnoutUKGeneralW* localTurnoutW* localTurnoutRetroW* genElecTurnoutRetroW* genElecTurnoutRetro2017W18  /*EU turnout and other attitudes towards EU*/ euroTurnoutW* euRefVoteW* euRefTurnoutRetroW* euRefTurnoutW* p_eurefturnout /*party and ideological identification*/ partyIdStrengthW* partyIdW* partyIdSqueezeW* pidWeTheyW*  leftRightW* /*demographics*/ gender ageW* p_socgradeW* p_work_statW* p_ethnicityW* p_gross_householdW* p_gross_personalW* p_housingW* p_maritalW* p_religionW* p_edlevelW*  ukCitizenW* euCitizenW* commonwealthCitizenW* otherCitizenW* countryW*   /*satisfaction with democracy*/ expectGoodConductGeneralW* expectGoodConductEURefW* satDemEUW* satDemUKW* satDemScotW* satDemWalesW* satDemEngW* satDemEUW*  /*trust in government*/ trustMPsW* trustYourMPW* trustParliament* trustCourtsW* trustWestminsterW*  /*general trust in people*/ genTrustW*  



/*****Section 2: Export data as .csv file for use in Fortran code to estimate our models*****/


export delimited using "subset_panel/BES2019_W19_Panel_SubsetVariables", nolabel replace


/*****Section 3: Prepare data to estimate comparison models *****/



/**Keep only variables used in models**/

keep id startDayFrm_Jan1_1960W* starttimeW* endDayFrm_Jan1_1960W* endtimeW* partyIdW* trustMPsW* efficacyUnderstandW* leftRightW* partyIdSqueezeW* partyIdStrengthW*  dutyToVote2W*  efficacyPolCareW*   satDemUKW* rec_dutyToVote2W*  efficacyNotUnderstandW* p_ethnicityW* p_edlevelW* gender p_religionW* countryW* p_gross_householdW*  ageW*


/*Convert data to long format*/

reshape long startDayFrm_Jan1_1960W starttimeW endDayFrm_Jan1_1960W endtimeW partyIdW trustMPsW efficacyUnderstandW leftRightW partyIdSqueezeW partyIdStrengthW  dutyToVote2W  efficacyPolCareW   satDemUKW rec_dutyToVote2W  efficacyNotUnderstandW p_ethnicityW p_edlevelW p_religionW countryW p_gross_householdW  ageW, i(id) j(wave)



/*recoding variables */

tab p_religionW, missing
generate religious = p_religionW
replace religious = 0 if p_religionW == 1 | p_religionW == . | p_religionW == 19
replace religious = 1 if p_religionW > 1 & p_religionW < 19

tab p_religionW religious, missing


codebook gender
generate rec_gender = gender
replace rec_gender = 0 if gender == .

tab rec_gender gender, missing


codebook rec_dutyToVote2W

generate rec_dutytovote2 = rec_dutyToVote2W
replace rec_dutytovote2 = . if rec_dutyToVote2W < 1

tab  rec_dutyToVote2W rec_dutytovote2



codebook trustMPsW

generate trustmps = trustMPsW
replace trustmps = . if trustMPsW == 9999
tab trustmps trustMPsW



codebook satDemUKW

generate satdemuk = satDemUKW
replace satdemuk = . if satDemUKW == 9999
tab satdemuk satDemUKW


codebook efficacyUnderstandW

generate efficacyunderstand = efficacyUnderstandW
replace efficacyunderstand = . if efficacyUnderstandW == 9999
tab efficacyunderstand efficacyUnderstandW


codebook efficacyPolCareW

generate efficacypolcare = efficacyPolCareW
replace efficacypolcare = . if efficacyPolCareW == 9999
tab efficacypolcare efficacyPolCareW



codebook p_edlevelW
generate  p_edlevel = p_edlevelW

replace p_edlevel = 0 if p_edlevelW == .
 
replace p_edlevel = 1 if p_edlevelW == 0 | p_edlevelW == 1

replace p_edlevel = 2 if p_edlevelW == 2 | p_edlevelW == 3
replace p_edlevel = 3 if p_edlevelW == 4
replace p_edlevel = 4 if p_edlevelW == 5

tab p_edlevelW p_edlevel


tab p_gross_householdW

generate p_gross_household = p_gross_householdW
//less than 25k
replace p_gross_household = 1 if p_gross_householdW == 1 | p_gross_householdW == 2 | p_gross_householdW == 3 | p_gross_householdW == 4 | p_gross_householdW == 5

//25k and less than 50k
replace p_gross_household = 2 if p_gross_householdW == 6 | p_gross_householdW == 7 | p_gross_householdW == 8 | p_gross_householdW == 9 | p_gross_householdW == 10

//50k or more
replace p_gross_household = 3 if p_gross_householdW == 11 | p_gross_householdW == 12 | p_gross_householdW == 13 | p_gross_householdW == 14 | p_gross_householdW == 15 

//income missing
replace p_gross_household = 0 if p_gross_householdW == 16 | p_gross_householdW == 17

tab p_gross_household p_gross_householdW

tab p_gross_household



/*If party id == 10 | 9999 used partyidsqueeze.*/

/*Final coding for party id: 0 - No Party, 1 - Conservative, 2-Labor, 3- Other Party*/
tab partyIdW

generate partyid_wip = partyIdW
replace partyid_wip = partyIdSqueezeW if partyIdW == 9999 | partyIdW == 10

tab partyid_wip partyIdSqueezeW

generate partyid = partyid_wip
replace partyid = 0 if partyid_wip == 10
replace partyid = 0 if partyid_wip == 9999 
replace partyid = 3 if partyid_wip > 2 & partyid_wip != 9999 & partyid_wip != 10

tab partyid_wip partyid, missing

/*Dummy variables*/

tab partyid,missing

generate no_party = .
replace no_party = 1 if partyid == 0
replace no_party = 0 if partyid == 1 | partyid == 2 | partyid == 3

tab partyid no_party,missing


generate conservative_party = .
replace conservative_party = 1 if partyid == 1
replace conservative_party = 0 if partyid == 0 | partyid == 2 | partyid == 3

tab partyid conservative_party,missing


generate labour_party = .
replace labour_party = 1 if partyid == 2
replace labour_party = 0 if partyid == 0 | partyid == 1 | partyid == 3

tab partyid labour_party,missing

generate other_party = .
replace other_party = 1 if partyid == 3
replace other_party = 0 if partyid == 0 | partyid == 1 | partyid == 2

tab partyid other_party,missing


/*Party id strength: 1 - Very strong, 2 - Fairly strong, 0 - Not very strong, not applicable (partyid = 0 OR partyid_wip == 10 |9999), missing (.)**/
tab partyIdStrengthW partyid_wip, missing
tab partyIdStrengthW, missing

generate partyidstrength = partyIdStrengthW
replace partyidstrength = 0 if partyIdStrengthW == 3 | partyIdStrengthW == 9999 | partyIdStrengthW == .
tab partyidstrength partyIdStrengthW

tab partyidstrength partyid, missing


/*Dummy variables*/

generate strong_partyidstrength = .

replace strong_partyidstrength = 1 if partyidstrength == 1
replace strong_partyidstrength = 0 if partyidstrength == 0 | partyidstrength == 2
tab partyidstrength strong_partyidstrength, missing


generate fstrong_partyidstrength = .
replace fstrong_partyidstrength = 1 if partyidstrength == 2
replace fstrong_partyidstrength = 0 if partyidstrength == 0 | partyidstrength == 1

tab partyidstrength fstrong_partyidstrength, missing

generate no_partyidstrength = .
replace no_partyidstrength = 1 if partyidstrength == 0
replace no_partyidstrength = 0 if partyidstrength == 1 | partyidstrength == 2


tab partyidstrength no_partyidstrength, missing



/*Generating covariance matrix to calculate portion of variance in each DV explained by partyid and partyidstrength */
correlate conservative_party labour_party other_party strong_partyidstrength fstrong_partyidstrength, covariance
matrix C = r(C) 

esttab matrix (C) using "../cov_party_affil.csv", replace






generate wavefe = wave


codebook ageW
generate age = ageW


generate startyear = year(startDayFrm_Jan1_1960W)

generate approx_byear = startyear - ageW



bysort id: egen meanbyear = mean(approx_byear)


generate byear = floor(meanbyear)


generate agecoh = 0

replace agecoh = 4 if byear < 1946

replace agecoh = 3 if byear >= 1946 &  byear <= 1964

replace agecoh = 2 if byear >= 1965 &  byear <= 1980

replace agecoh = 1 if byear > 1980


tab countryW
generate country = countryW
tab country countryW


/**save data as Stata .dta file to estimate comparison models*/
save "../BES2019_W19_Panel_SubsetVariables.dta", replace


log close
translate log_recode.smcl log_recode.pdf

